home *** CD-ROM | disk | FTP | other *** search
- /* file merge_files.c ... 870902-... ^z
- * more functions to do the work of merging subindices together ...
- * with buffering of inputs and outputs ...
- */
-
-
-
- #include <stdio.h>
- #include <unix.h>
- #include <storage.h>
- #include <strings.h>
- #include <ctype.h>
- #include <proto.h>
- #include "qndxr.2.h"
-
-
- /* function to do the real work of merging sorted k & p
- * files into a single sorted file...
- *
- * Procedure is as follows:
- *
- * Compare the current key records from each of the N files to be merged.
- * Take the smallest of the keys (or, if there is a tie, take the one
- * from the earlier file number) and write its pointer records out to
- * the *.p file for the next generation. If the key is a new one, that
- * is, if it differs from the previous key, write out the previous key
- * word and the value for cumulative_counts to the *.k file, and reset
- * the previous key's value to that of the current key. Then repeat
- * this whole procedure, until all the input files are exhausted.
- *
- * The above works provided that we are careful in choosing the smallest
- * key and never let a file that has been exhausted (reached EOF) win a
- * comparison. The function smallest_key does that properly below, since
- * a file that is at EOF has returned a NULL pointer for its key_rec...
- *
- * For each file, the variable prev_cc[i] holds the previous value of
- * cumulative_counts for that file, so that we can determine how
- * many ptr_recs to write out by doing a simple subtraction.
- *
- * Buffer numbering scheme: the Kth input file has zbuffer #K
- * for its keys and zbuffer #(K+n) for its pointers; the output
- * buffers are zbuffers #(2*n) for keys and #(2*n+1) for pointers.
- */
-
- void nway_merge_kpfiles (ink, inp, outk, outp, n)
- FILE *ink[], *inp[], *outk, *outp;
- register int n;
- {
- register int i;
- KEY_REC *kr[NMERGE], prev_key;
- long prev_cc[NMERGE], out_cc;
- extern long zbufsiz;
- char *strncpy();
-
- DEBUG ("--entering nway_merge_kpfiles with n=%d\n", n);
- for (i = 0; i < n; ++i)
- prev_cc[i] = 0;
- out_cc = 0;
-
- for (i = 0; i < n; ++i)
- {
- create_zbuffer (i, zbufsiz, ink[i], sizeof(KEY_REC));
- create_zbuffer (i + n, zbufsiz, inp[i], sizeof(long));
- }
- create_zbuffer (2 * n, zbufsiz, outk, sizeof(KEY_REC));
- create_zbuffer (2 * n + 1, zbufsiz, outp, sizeof(long));
-
- for (i = 0; i < n; ++i)
- kr[i] = (KEY_REC *)next_input_item (i);
-
- i = smallest_key (kr, n);
- strncpy (prev_key.kkey, kr[i]->kkey, KEY_LENGTH);
- DEBUG ("--first key is %.28s\n", prev_key.kkey);
-
- while (merge_not_finished (kr, n))
- {
- i = smallest_key (kr, n);
- if (zstrcmp (prev_key.kkey, kr[i]->kkey))
- {
- copy_key_rec (prev_key.kkey, out_cc, 2 * n);
- strncpy (prev_key.kkey, kr[i]->kkey, KEY_LENGTH);
- }
- copy_ptr_recs (i + n, kr[i]->ccount - prev_cc[i], 2 * n + 1);
- out_cc += kr[i]->ccount - prev_cc[i];
- prev_cc[i] = kr[i]->ccount;
- kr[i] = (KEY_REC *)next_input_item (i);
- }
-
- copy_key_rec (prev_key.kkey, out_cc, 2 * n);
- DEBUG ("--finished nway merge ... final key=%.28s\n", prev_key.kkey);
- flush_zbuffer (2 * n);
- flush_zbuffer (2 * n + 1);
- for (i = 0; i < 2 * n + 2; ++i)
- free_zbuffer (i);
- }
-
-
- /* function to copy a chosen number of ptr_recs (longs) from one file
- * to another ... used in merging two files by merge_kpfiles() above....
- * revised and simplified to use zbuffers....870902 ... ^z
- */
-
- void copy_ptr_recs (inzbuf, count, outzbuf)
- register int inzbuf, outzbuf;
- register long count;
- {
- register long *inp, *outp;
- char *next_input_item(), *next_output_item();
-
- for ( ; count > 0; --count)
- {
- inp = (long *)next_input_item (inzbuf);
- outp = (long *)next_output_item (outzbuf);
- *outp = *inp;
- }
- }
-
-
- /* function to write a key record including kkey (key word) and ccount
- * (cumulative_count) to an output file...
- */
-
- void copy_key_rec (kkey, cc, outzbuf)
- char *kkey;
- long cc;
- int outzbuf;
- {
- KEY_REC *outp;
- char *strncpy(), *next_output_item();
-
- outp = (KEY_REC *)next_output_item (outzbuf);
- strncpy (outp->kkey, kkey, KEY_LENGTH);
- outp->ccount = cc;
- }
-
-
- /* simple function to see if we are not yet finished with all of the input
- * files coming in to the merge operation ... signified by at least one of
- * the key pointers being non-NULL....
- */
-
- int merge_not_finished (kr, n)
- KEY_REC *kr[];
- register int n;
- {
- register int i;
-
- for (i = 0; i < n; ++i)
- if (kr[i] != NULL)
- return (TRUE);
-
- return (FALSE);
- }
-
-
- /* function to determine the smallest of the n keys pointed to by the
- * kr[] vector of pointers to KEY_RECs ... note that a NULL ptr ranks
- * after any other...also note that in case of a tie, the smaller
- * value of i is the one to return (for a stable merging sort)
- */
-
- smallest_key (kr, n)
- KEY_REC *kr[];
- register int n;
- {
- register int i, smallest;
-
- for (i = 0; i < n; ++i)
- if (kr[i] != NULL)
- {
- smallest = i;
- break;
- }
-
- for (i = smallest + 1; i < n; ++i)
- if (kr[i] != NULL)
- if (zstrcmp (kr[smallest]->kkey, kr[i]->kkey) > 0)
- smallest = i;
-
- return (smallest);
- }
-
-
-